library(plyr)
library(reshape2)
library(readstata13)
suppressMessages(library(tidyverse))

# Function to generate path to replication data files
rep_path <- function(file) {
  paste0("raw-data/replication-raw-data/", file)
}


#ariga_2015
load(rep_path("ariga_2015/IncDisadvJPN.RData"))

# The dataset includes elections from 1958 to 1993. The analysis requires three consecutive elections:
# t-1, t, and t+1. It estimates the causal impact of winning in t (or incumbency in t+1) on election
# outcome in t+1. Observations from t-1 are used for the placebo test.
#
# The observations for which t=1958 are deleted as they don't have t-1. Those for which t=1993 are
# also dropped as they don't have t+1.
LDP <- LDP[which(LDP$year!=1958 & LDP$year!=1993),]
# The analysis focuses on districts with 3 to 5 seats only.
LDP <- LDP[which(LDP$dm>=3 & LDP$dm<=5),]
# The observations with redistricting either between t and t-1 or between t+1 and t are dropped.
LDP <- subset(LDP, after.redist!=1)
LDP <- subset(LDP, next.after.redist!=1)
# The candidates who switched party are dropped.
LDP <- subset(LDP, next.ptyid=="LDP")
# Finally, the analysis focuses on the districts in which intra-party competition took place in t+1.
LDP <- subset(LDP, next.d.numinc>=1 & next.d.numcan.samepty > next.d.numinc)

write.csv(LDP, "data/cl-replication-data/ariga_2015.csv")

#boas_hidalgo_2011
load(rep_path("boas_hidalgo_2011/replication.RData"))

write.csv(applied.data, "data/cl-replication-data/boas_hidalgo_2011.csv")

#boas_etal_2014:

load(rep_path("boas_etal_2014/spoils_of_victory_replication_data.RData"))

depfed_data <- depfed_data %>%
  mutate(outcome_var = log((contracts.0810 + 1)/donor.firms))

write.csv(depfed_data, "data/cl-replication-data/boas_etal_2014.csv")

#bohlken_2018:

load(rep_path("bohlken_2018/BIMARU_MPLADS_AJPS_all.RData"))

table <- table[!is.na(table$project_aft_elec_2yr),]
table <- table[!is.na(table$forcing),]

bohlken_analysis_df <- table %>%
  filter(project_aft_elec_2yr == 1,
         gp_missing == 0,
         matched == 1)

write.csv(bohlken_analysis_df, "data/cl-replication-data/bohlken_2018.csv")

#broockman_ryan_2016:

broock_ryan_df <- read_csv(rep_path("broockman_ryan_2016/cces_2008_working_merged_1.csv"))
broock_ryan_df$contact <- as.numeric(as.character(broock_ryan_df$contact))
broock_ryan_df$rspartywinperc <- as.numeric(as.character(broock_ryan_df$rspartywinperc))
broock_ryan_df$distname <- as.character(broock_ryan_df$distname)

write.csv(broock_ryan_df, "data/cl-replication-data/broockman_ryan_2016.csv")

#caughey_etal_2017:


myRecode <- function(var, recode.ls, as.factor.result=TRUE, ...) {
  options(useFancyQuotes = FALSE)
  recode.ls <- sapply(recode.ls, function(x) paste(sQuote(x), collapse='='))
  recodes <- paste(recode.ls, collapse=';')
  print(recodes)
  revar <- Recode(var, recodes, as.factor.result=TRUE, ...)
  options(useFancyQuotes = TRUE)
  revar
}
rols <- function (..., data, subset=TRUE, cluster=1:nrow(data),
                  method="huber") {
  mod <- ols(..., data=data[subset, ], x=TRUE)
  robcov(mod, cluster=cluster[subset], method=method)
}
PasteEval <- function(..., print = FALSE) {
  txt <- paste(..., sep = "")
  if (print) { cat("\n", txt, "\n") }
  eval(parse(text = txt), envir = parent.frame())
}
FileName <- function (path=NULL, name=NULL, ext=NULL, replace=FALSE) {
  p <- paste(path, collapse = "")
  n <- paste(name, collapse = "")
  e <- paste(".", ext, sep = "")
  d <- format(Sys.Date(), "%y%m%d") 
  for (l in seq_along(letters)) {
    if (l > 1) old_file_name <- file_name
    file_name <- paste0(p, d, paste(n, letters[l], sep="-"), e)
    if (!any(grepl(file_name, list.files()))) {
      if (replace) file_name <- ifelse(l > 1, old_file_name, file_name)
      break
    }
  }
  cat("\nFile name:", file_name, "\n\n")
  return(file_name)
}

mysw <- function (...) {
  setwd(paste(..., sep = "/"))
}
mypdf <- function(name, ...) {
  date <- format(Sys.Date(), "%y%m%d")
  pdf(paste(paste(name, collapse=""), date, ".pdf", sep=""), ...)
}
tri <- function (x, h, c=0) pmax(0, 1 - abs((x - c) / h))
POtoSouth11 <- function (pos, dta=st.info) {
  dta$South11[match(pos, dta$POAbrv)]
}



st.info <- read.table(rep_path("caughey_etal_2017/StateCodes.tab"))


load(rep_path("caughey_etal_2017/party_control_data_161121.RData"))

data.use <- data %>%
  plyr::arrange(abb, year) %>%
  dplyr::filter(!is.na(abb) & year >= 1936)

table(data.use$year)


#Setup RDD Variables


data.use <- data.use %>%
  group_by(
    abb
  ) %>% mutate(
    DemGov = gov_party,
    DemPropGov0 = demprop2 - 1/2,
    DemMarginGov = DemPropGov0 * 2,
    DemWinGov = as.integer(DemPropGov0 > 0),
    DemSeatShareHouse = hs_dem_per_2pty / 100,
    DemSeatShareSenate = sen_dem_per_2pty / 100,
    DemControlHouse = hs_dem_control,
    DemControlSenate = sen_dem_control,
    GovLib = Policy,
    GovLibL1 = lag(GovLib, 1),
    GovLibL2 = lag(GovLib, 2),
    GovLibL3 = lag(GovLib, 3),
    GovLibL4 = lag(GovLib, 4),        
    GovLibL12 = (GovLibL1 + GovLibL2) / 2,
    GovLibP1 = lead(GovLib, 1),
    GovLibP2 = lead(GovLib, 2),
    GovLibP3 = lead(GovLib, 3),
    GovLibP4 = lead(GovLib, 4),
    GovLibP12 = (GovLibP1 + GovLibP2) / 2,
    GovLibD1 = GovLibP1 - GovLib, ## delta 1
    GovLibD2 = GovLibP2 - GovLib,  ## delta 2
    GovLibD3 = GovLibP3 - GovLib,  ## delta 3
    GovLibD4 = GovLibP4 - GovLib,  ## delta 4
    GovLibD12 = (GovLibD1 + GovLibD2)/2, ## ave of deltas 1-2
    GovLibDL1 = GovLib - GovLibL1 ## change, t relative to t-1
  ) %>% ungroup()


res.fe <- resid(lm(GovLib ~ abb + factor(year), data=data.use))
res.l1 <- resid(lm(GovLib ~ Policy_L1, data=data.use))
res.l2 <- resid(lm(GovLib ~ Policy_L2, data=data.use))
res.fe.l1 <- resid(lm(GovLib ~ abb + factor(year) + GovLibL1, data=data.use))
res.fe.l2 <- resid(lm(GovLib ~ abb + factor(year) + GovLibL2, data=data.use))

data.use$GovLib.fe0 <- NA
data.use$GovLib.fe0[as.integer(names(res.fe))] <- res.fe
data.use$GovLib.fe1 <- NA
data.use$GovLib.fe1[as.integer(names(res.fe.l1))] <- res.fe.l1
data.use$GovLib.fe2 <- NA
data.use$GovLib.fe2[as.integer(names(res.fe.l2))] <- res.fe.l2
data.use$GovLib.1 <- NA
data.use$GovLib.1[as.integer(names(res.l1))] <- res.l1
data.use$GovLib.2 <- NA
data.use$GovLib.2[as.integer(names(res.l2))] <- res.l2

data.use <- group_by(data.use, abb) %>%
  mutate(GovLib.1L1 = lag(GovLib.1, 1), ## lag of first-difference
         GovLib.1L2 = lag(GovLib.1, 2),
         GovLib.2L1 = lag(GovLib.2, 1), 
         GovLib.2L2 = lag(GovLib.2, 2),
         GovLib.fe0L1 = lag(GovLib.fe0, 1),
         GovLib.fe0L2 = lag(GovLib.fe0, 2),
         GovLib.fe1L1 = lag(GovLib.fe1, 1),
         GovLib.fe1L2 = lag(GovLib.fe1, 2),
         GovLib.fe2L1 = lag(GovLib.fe2, 1),
         GovLib.fe2L2 = lag(GovLib.fe2, 2),
         GovLib.1P1 = lead(GovLib.1, 1), ## lead of first-difference
         GovLib.1P2 = lead(GovLib.1, 2),
         GovLib.2P1 = lead(GovLib.2, 1), 
         GovLib.2P2 = lead(GovLib.2, 2),
         GovLib.fe0P1 = lead(GovLib.fe0, 1),
         GovLib.fe0P2 = lead(GovLib.fe0, 2),
         GovLib.fe1P1 = lead(GovLib.fe1, 1),
         GovLib.fe1P2 = lead(GovLib.fe1, 2),
         GovLib.fe2P1 = lead(GovLib.fe2, 1),
         GovLib.fe2P2 = lead(GovLib.fe2, 2)
  )

write.csv(data.use, "data/cl-replication-data/caughey_etal_2017.csv")

#carson_sievert_2017:

d <- read.csv(rep_path("carson_sievert_2017/fig1_fullbandwidth.csv"))

table(d$elecdate[d$margin < 5])

#party: -1 Whig/Rep won 1: Dem won
#convert to demwon
d$dem_seat <- NA
d$dem_seat[d$party == 1] <- 1
d$dem_seat[d$party == -1] <- 0

######
nov_df <- d %>%
  filter(elecdate == "nov") %>%
  mutate(rv = dvp - 50)

write.csv(nov_df, "data/cl-replication-data/carson_sievert_2017.csv")

#cavaille_marshall_2018

cav_marshall_df <- read.dta13(rep_path("cavaille_marshall_2018/Main_Dataset.dta"))

write.csv(cav_marshall_df, "data/cl-replication-data/cavaille_marshall_2018.csv", row.names = FALSE)



#coppock_green_2016:

load(rep_path("coppock_green_2016/USA.habit.rdata"))

states <- c("AR","CT","IA","IL","FL", "KY", "MO", "MT","NJ", "NV", "NY", "OK","OR","PA","RI")

#subset to states of interest
coppock_green_df <- USA %>%
  filter(state %in% states)

#select only the variables needed

coppock_green_df <- coppock_green_df[,c("voted10", "days_08", "voted08")]

write.csv(coppock_green_df, "data/cl-replication-data/coppock_green_2016.csv")

#dahlgard_2018

load(rep_path("dahlgard_2018/agg_day_year_all.RData"))
load(rep_path("dahlgard_2018/n_day_year_all.RData"))

data <- left_join(ungroup(data_day_year), ungroup(data_n_year),
                  by = c("days", "year"))

data_close <- 
  data %>%
  mutate(voted     = round(par_vote*n),
         abstained = round((1-par_vote)*n),
         treated   = days > 0) %>% 
  select(year, days, treated, voted, abstained)

data_voted <- 
  as.data.frame(lapply(data_close, 
                       function(x,p) rep(x,p), 
                       data_close[["voted"]])) %>%
  mutate(voted = 1)

data_abstained <- 
  as.data.frame(lapply(data_close, 
                       function(x,p) rep(x,p), 
                       data_close[["abstained"]])) %>%
  mutate(voted = 0)

data <- 
  rbind(data_voted, data_abstained)

##subset into the estimate_code - level data frames

#dahlgard_2018a

data_2009 <- 
  data %>%
  ungroup() %>%
  mutate(days = days) %>% 
  filter(year == 2009)

#vars: voted	days

data_2009 <- data_2009[,c("voted",	"days")]

write.csv(data_2009, "data/cl-replication-data/dahlgard_2018a.csv", row.names = FALSE)

#dahlgard_2018b

data_2013 <- 
  data %>%
  ungroup() %>%
  mutate(days = days) %>% 
  filter(year == 2013)

#vars: voted	days

data_2013 <- data_2013[,c("voted",	"days")]

write.csv(data_2013, "data/cl-replication-data/dahlgard_2018b.csv", row.names = FALSE)

#dahlgard_2018c

data_2014 <- 
  data %>%
  ungroup() %>%
  mutate(days = days) %>% 
  filter(year == 2014)

#vars: voted	days

data_2014 <- data_2014[,c("voted",	"days")]

write.csv(data_2014, "data/cl-replication-data/dahlgard_2018c.csv", row.names = FALSE)

#dahlgard_2018d

data_2015 <- 
  data %>%
  ungroup() %>%
  mutate(days = days) %>% 
  filter(year == 2015)

#vars: voted	days

data_2015 <- data_2015[,c("voted",	"days")]

write.csv(data_2015, "data/cl-replication-data/dahlgard_2018d.csv", row.names = FALSE)

#dbk_2018:


# Elections data from de Benedictis-Kessner & Warshaw, 2016, ``Mayoral Partisanship and Municipal Fiscal Policy", Journal of Politics 78(4): 1124-1138. Updated with additional cities under 75,000 in population from Ferreira & Gyourko and to fill in temporal holes in existing cities's data, and merged with ICMA institutions data.
load(rep_path("dbk_2018/mayoral_elecs_updated20170413.RData"))

data <- x

## Media data scraped from Library of Congress:

load(rep_path("dbk_2018/city_papers_20k.RData"))

media <- x


#############################
#### Set up incumbent VS ####
#############################
library(stringr); library(reshape2)
library(tools)

#fix column names

names(data) <- substring(names(data), 3)
char_array <- colnames(data)

a <- data.frame("data1"=char_array,"data2"=1:55)
a$data1 <- as.character(a$data1)
a$data1 = substr(a$data1,1,nchar(a$data1)-1)

colnames(data) <- a$data1

names(media) <- substring(names(media), 3)
char_array1 <- colnames(media)

b <- data.frame("data1"=char_array1,"data2"=1:7)
b$data1 <- as.character(b$data1)
b$data1 = substr(b$data1,1,nchar(b$data1)-1)

colnames(media) <- b$data1

###########
##### Construct incumbency and VS measures:
###########

# fix month var:
for(i in 1:nrow(data)){
  if(!is.na(data$elecdate[i])){
    data$month[i] <- gsub("\\d{4}-(\\d{2})-\\d{2}","\\1",x=data$elecdate[i])
  }
}
sum(is.na(data$month)) # 2681 NAs

data$use2 <- NA

data$mayor_next_VS <- NA
data$runnerup_next_VS <- NA
data$mayor_run_next <- NA
data$runnerup_run_next <- NA
data$elec_index_next <- NA
data$YearData_next <- NA
nextelection <- NULL
for(i in 1:nrow(data)){
  thiscity <- data[which(data$fips==data$fips[i]),]
  futureyears <- thiscity[which(thiscity$YearData > data$YearData[i]),]
  if(nrow(futureyears)>0){
    nextelection <- futureyears[order(futureyears$YearData,futureyears$month, decreasing=F)[1],]
    data$elec_index_next[i] <- nextelection$elec_index
    data$YearData_next[i] <- nextelection$YearData
    if(!is.na(data$mayor_name_final[i])){
      if(data$YearData_next[i]-data$YearData[i] <= 6){ # only use data for "next election" if reasonable time frame
        if(data$mayor_name_final[i] %in% nextelection[,c("mayor_name_final","runnerup_name_final")]){
          data$mayor_run_next[i] <- 1
          data$use2[i] <- 1
        } 
        if(!(data$mayor_name_final[i] %in% nextelection[,c("mayor_name_final","runnerup_name_final")])){
          data$mayor_run_next[i] <- 0
        }
      }
      if(!is.na(data$runnerup_name_final[i])){
        if(data$runnerup_name_final[i] %in% nextelection[,c("mayor_name_final","runnerup_name_final")]){
          data$runnerup_run_next[i] <- 1
          data$use2[i] <- 1
        } 
        if(!(data$runnerup_name_final[i] %in% nextelection[,c("mayor_name_final","runnerup_name_final")])){
          data$runnerup_run_next[i] <- 0
        }
      }
    }
    if(is.na(data$mayor_name_final[i])){
      data$mayor_run_next[i] <- NA
    }
    if(is.na(data$runnerup_name_final[i])){
      data$runnerup_run_next[i] <- NA
    }
    if(!(data$mayor_name_final[i] %in% nextelection[,c("mayor_name_final","runnerup_name_final")]) & !(data$runnerup_name_final[i] %in% nextelection[,c("mayor_name_final","runnerup_name_final")])){
      data$use2[i] <- 0
    }
    if(is.na(data$runnerup_name_final[i]) & is.na(data$runnerup_name_final[i])){
      data$use2[i] <- 0
    }
  }
  if(nrow(futureyears)<1 | (data$YearData_next[i]-data$YearData[i] > 6)){
    data$mayor_run_next[i] <- NA
    data$runnerup_run_next[i] <- NA
    data$use2[i] <- 0
  }
  
  # Reproduce bug
  if (is.na(data$use2[i])) break
  
  if(data$use2[i]==1){
    thiscity <- data[which(data$fips==data$fips[i]),]
    futureyears <- thiscity[which(thiscity$YearData > data$YearData[i]),]
    
    if(nrow(futureyears)>0){
      nextelection <- futureyears[order(futureyears$YearData, decreasing=F)[1],]
      
      if(!is.na(nextelection$mayor_votes_final) & !is.na(nextelection$runnerup_votes_final)){
        if(!is.na(data$mayor_name_final[i])){
          if(!is.na(nextelection$mayor_name_final) & data$mayor_name_final[i] == nextelection$mayor_name_final){
            data$mayor_next_VS[i] <- nextelection$mayor_votes_final/(nextelection$mayor_votes_final+nextelection$runnerup_votes_final)
          }
          if(!is.na(nextelection$runnerup_name_final) & data$mayor_name_final[i] == nextelection$runnerup_name_final){
            data$mayor_next_VS[i] <- nextelection$runnerup_votes_final/(nextelection$mayor_votes_final+nextelection$runnerup_votes_final)
          }
        }
        if(is.na(data$mayor_name_final[i])){
          data$mayor_next_VS[i] <- NA
        }
        
        if(!is.na(data$runnerup_name_final[i])){
          if(!is.na(nextelection$mayor_name_final) & data$runnerup_name_final[i] == nextelection$mayor_name_final){
            data$runnerup_next_VS[i] <- nextelection$mayor_votes_final/(nextelection$mayor_votes_final+nextelection$runnerup_votes_final)
          }
          if(!is.na(nextelection$runnerup_name_final) & data$runnerup_name_final[i] == nextelection$runnerup_name_final){
            data$runnerup_next_VS[i] <- nextelection$runnerup_votes_final/(nextelection$mayor_votes_final+nextelection$runnerup_votes_final)
          } 
        }
        if(is.na(data$runnerup_name_final[i])){
          data$runnerup_next_VS[i] <- NA
        }
      }
      if(is.na(nextelection$mayor_votes_final) | is.na(nextelection$runnerup_votes_final)){
        data$mayor_next_VS[i] <- NA
        data$runnerup_next_VS[i] <- NA
      }
    }
  }
  
  if(data$use2[i]==0){
    data$mayor_prev_VS[i] <- NA
    data$runnerup_prev_VS[i] <- NA
  }
}

# summary(data$mayor_next_VS);sum(!is.na(data$mayor_next_VS)) # 65.4%, 4203 with data (out of 9186)!
# summary(data$runnerup_next_VS) # 35.3%
# summary(data$mayor_run_next) # 62.16%
# summary(data$runnerup_run_next) # 19.01%

data$mayor_voteshare <- data$mayor_votes_final/(data$mayor_votes_final + data$runnerup_votes_final)
data$runnerup_voteshare <- data$runnerup_votes_final/(data$mayor_votes_final + data$runnerup_votes_final)
data$turnout <- data$mayor_votes_final + data$runnerup_votes_final

# checking elec_index matching:
# head(table(data$elec_index_next)[order(table(data$elec_index_next),decreasing = T)],10)
# fix elections with multiple elections in years: 965 1708 3270 4790 5800 
data$elec_index_next[which(data$elec_index==4784)] <- 4785 # fixing from 4790
data$elec_index_next[which(data$elec_index==5797)] <- 5796 # fixing from 5800
data$elec_index_next[which(data$elec_index==3268)] <- 3269 # fixing from 3270
data$elec_index_next[which(data$elec_index==964)] <- 9398 # fixing from 965
data$elec_index_next[which(data$elec_index==1705)] <- 1704 # fixing from 1708



## election timing:

natyears <- c(seq(from=1940,to=2014,by=2))
presyears <-c(seq(from=1940,to=2012,by=4))
midyears <- c(seq(from=1942,to=2014,by=4))

data$concurrent <- NA
data$concurrent_pres <- NA
data$concurrent_mid <- NA
for(i in 1:nrow(data)){
  if(!is.na(data$YearData[i]) & !is.na(data$month[i])){
    
    if(data$YearData[i] %in% natyears & data$month[i]==11){
      data$concurrent[i] <- 1
    }
    if(data$YearData[i] %in% natyears & data$month[i]!=11){
      data$concurrent[i] <- 0
    }
    if(!(data$YearData[i] %in% natyears)){
      data$concurrent[i] <- 0
    }
    
    if(data$YearData[i] %in% presyears & data$month[i]==11){
      data$concurrent_pres[i] <- 1
    }
    if(data$YearData[i] %in% presyears & data$month[i]!=11){
      data$concurrent_pres[i] <- 0
    }
    if(!(data$YearData[i] %in% presyears)){
      data$concurrent_pres[i] <- 0
    }
    
    if(data$YearData[i] %in% midyears & data$month[i]==11){
      data$concurrent_mid[i] <- 1
    }
    if(data$YearData[i] %in% midyears & data$month[i]!=11){
      data$concurrent_mid[i] <- 0
    }
    if(!(data$YearData[i] %in% midyears)){
      data$concurrent_mid[i] <- 0
    }
    
  }
  if(!is.na(data$YearData[i]) & is.na(data$month[i])){ # if only year of data:
    if(!(data$YearData[i] %in% natyears)){
      data$concurrent[i] <- 0
    }
    if(!(data$YearData[i] %in% presyears)){
      data$concurrent_pres[i] <- 0
    }
    if(!(data$YearData[i] %in% midyears)){
      data$concurrent_mid[i] <- 0
    }
  }
  if(is.na(data$month[i]) & is.na(data$elecdate[i]) & is.na(data$YearData[i])){
    data$concurrent[i] <- NA
    data$concurrent_pres[i] <- NA
    data$concurrent_mid[i] <- NA
  }
}
# summary(data$concurrent);table(data$concurrent) # 7.39%, 615 elections
# summary(data$concurrent_pres);table(data$concurrent_pres) # 3.28%, 288 elections

# create lead var
data$concurrent_next <- data$concurrent[match(data$elec_index_next,data$elec_index)]
# summary(data$concurrent_next); sum(data$concurrent_next==1,na.rm=T) # only 7.6% concurrent, or 544 elections! (2029 NAs)
data$concurrent_pres_next <- data$concurrent_pres[match(data$elec_index_next,data$elec_index)]
data$concurrent_mid_next <- data$concurrent_mid[match(data$elec_index_next,data$elec_index)]
# summary(data$concurrent_pres_next); sum(data$concurrent_pres_next==1,na.rm=T) # 3.4%, or 266 elections

## Construct measure of demshare in next election:
data$demshare_next <- data$demshare[match(data$elec_index_next,data$elec_index)]

for(i in 1:nrow(data)){
  if(!is.na(data$elec_index_next[i])){
    data$demshare_next[i] <- ifelse((data$mayor_party_final[which(data$elec_index==data$elec_index_next[i])]!="D" & data$runnerup_party_final[which(data$elec_index==data$elec_index_next[i])]!="D"),
                                    0, # change to zero if neither candidate was a Dem
                                    data$demshare_next[i])
  }
}

##########
### Manipulate into a candidate-level dataframe:
##########

data2 <- data[,c("fips","Name","STATEAB","JURIS","YearData","YearData_next","elec_index","elec_index_next","elecdate","month","population_est","mayor_name_final","runnerup_name_final","mayor_party_final","runnerup_party_final","mayor_voteshare","runnerup_voteshare","mayor_next_VS","runnerup_next_VS","mayor_run_next","runnerup_run_next")]
data3 <- melt(data2,id.vars=c("fips","Name","STATEAB","JURIS","YearData","YearData_next","elec_index","elec_index_next","elecdate","month","population_est","mayor_name_final","mayor_party_final"),measure.vars= c("mayor_voteshare","mayor_next_VS","mayor_run_next"))
data4 <- dcast(data3,fips + Name + STATEAB + JURIS + YearData + YearData_next + elec_index + elec_index_next + elecdate + month + population_est + mayor_name_final + mayor_party_final ~ variable)
names(data4)[grep("mayor_name_final",x=names(data4))] <- "cand_name"
names(data4)[grep("mayor_party_final",x=names(data4))] <- "cand_party"
names(data4)[grep("mayor_voteshare",x=names(data4))] <- "voteshare"
names(data4)[grep("mayor_next_VS",x=names(data4))] <- "next_VS"
names(data4)[grep("mayor_run_next",x=names(data4))] <- "run_next"


data3b <- melt(data2,id.vars=c("fips","Name","STATEAB","JURIS","YearData","YearData_next","elec_index","elec_index_next","elecdate","month","population_est","runnerup_name_final","runnerup_party_final"),measure.vars= c("runnerup_voteshare","runnerup_next_VS","runnerup_run_next"))
data4b <- dcast(data3b,fips + Name + STATEAB + JURIS + YearData + YearData_next + elec_index + elec_index_next + elecdate + month + population_est + runnerup_name_final + runnerup_party_final ~ variable)
names(data4b)[grep("runnerup_name_final",x=names(data4b))] <- "cand_name"
names(data4b)[grep("runnerup_party_final",x=names(data4b))] <- "cand_party"
names(data4b)[grep("runnerup_voteshare",x=names(data4b))] <- "voteshare"
names(data4b)[grep("runnerup_next_VS",x=names(data4b))] <- "next_VS"
names(data4b)[grep("runnerup_run_next",x=names(data4b))] <- "run_next"
data5 <- rbind(data4,data4b) # all set

# join in turnout, demshare
data5$turnout <- data$turnout[match(data5$elec_index,data$elec_index)]

# join back in institutions data:
data5$fog <- data$fog[match(data5$elec_index_next,data$elec_index)]
data5$partisan <- data$partisan[match(data5$elec_index_next,data$elec_index)]
data5$initiative <- data$initiative[match(data5$elec_index_next,data$elec_index)]
data5$referendum <- data$referendum[match(data5$elec_index_next,data$elec_index)]

data5$concurrent_next <- data$concurrent_next[match(data5$elec_index,data$elec_index)]
data5$concurrent_pres_next <- data$concurrent_pres_next[match(data5$elec_index,data$elec_index)]
data5$concurrent_mid_next <- data$concurrent_mid_next[match(data5$elec_index,data$elec_index)]

# summary(data5$concurrent_next) # still 7.6%, double the NAs from election-level data (3446)
# summary(data5$concurrent_pres_next) #  3.4%
# summary(data5$concurrent_mid_next) #  3.9%

# construct run/win combo dummy variable:
data5$run_and_win_next <- NA
for(i in 1:nrow(data5)){
  
  if(!is.na(data5$run_next[i]) & !is.na(data5$next_VS[i])){
    data5$run_and_win_next[i] <- ifelse(data5$run_next[i]==1 & data5$next_VS[i]>=0.5,1,0)  
  }
}
summary(data5$run_and_win_next) # 69.3% of candidates
# table(data5$run_and_win_next,data5$run_next) # shows that people who don't run are coded NA in run_and_win, need to change to 0
for(i in 1:nrow(data5)){
  if(!is.na(data5$run_next[i]) & data5$run_next[i]==0){
    data5$run_and_win_next[i] <- 0
  }
}
summary(data5$run_and_win_next) # 24.7% of candidates

#####
### Prep variables to join with media market data:
#####
data5$city <- gsub( " TOWNSHIP", "",data5$Name)
data5$city <- gsub( " TOWN", "",data5$city)
data5$city <- gsub( " CITY AND BOROUGH", "",data5$city)
data5$city <- gsub( " CITY AND COUNTY", "",data5$city)
data5$city <- gsub( " CITY-PARISH", "",data5$city)
data5$city <- gsub( " CITY", "",data5$city)
data5$city <- gsub( " VILLAGE", "",data5$city)
data5$city <- gsub( "ST ", "St. ",data5$city)
data5$city <- gsub( "FT ", "Fort ",data5$city)
data5$city <- gsub( "MT ", "Mount ",data5$city)
data5$city <- gsub( "WASHINGTON DC", "Washington",data5$city)
data5$city <- gsub( "WILKES BARRE", "WILKES-BARRE",data5$city)
data5$city <- gsub( "WINSTON SALEM", "WINSTON-SALEM",data5$city)
data5$city <- gsub( " MUNICIPALITY", "",data5$city)
data5$city <- tolower(data5$city)

data5$city <- gsub( "lexington-fayette urban co govt", "lexington",data5$city)
data5$city <- gsub( "lexington-fayette urban county", "lexington",data5$city)
data5$city <- gsub( "lexington-fayette urban county g", "lexington",data5$city)
data5$city <- gsub( "lexington-fayette urban county government",
                    "lexington",data5$city)

#####
### Merge with media data: 
#####
media$city <- tolower(media$city)
data6 <- merge(data5,media,by.x=c("city","STATEAB","YearData_next"),by.y=c("city","abb","years"),all.x=T)
data6 <- subset(data6,YearData>=1950)
data6$voteshare_adj <- data6$voteshare-0.5
data6$fog2 <- recode(data6$fog,"1=1;2=0;else=NA")

## calculate dailypaper difference variable (if it gained/lost paper):
data6$dailypaper_delta <- NULL
for(i in 1:nrow(data6)){
  thiscity <- data6[which(data6$fips==data6$fips[i]),]
  prevyears <- thiscity[which(thiscity$YearData<data6$YearData[i]),]
  if(nrow(prevyears)>0){
    prevyears <- prevyears[order(prevyears$YearData,decreasing=T),]
    data6$dailypaper_delta[i] <- data6$dailypaper[i]- prevyears$dailypaper[1]
  }
  if(nrow(prevyears)==0){
    data6$dailypaper_delta[i] <- NA
  }
}
table(data6$dailypaper_delta) 

data6$dailypaper_delta2 <- NULL
for(i in 1:nrow(data6)){
  thiscity <- data6[which(data6$fips==data6$fips[i]),]
  prevyears <- thiscity[which(thiscity$YearData<data6$YearData[i]),]
  if(nrow(prevyears)>0){
    prevyears <- prevyears[order(prevyears$YearData,decreasing=T),]
    data6$dailypaper_delta2[i] <- prevyears$dailypaper_delta[1]
  }
  if(nrow(prevyears)==0){
    data6$dailypaper_delta2[i] <- NA
  }
}
table(data6$dailypaper_delta2) # 68,22, 9140

# delta anticipatory variables:
data6$dailypaper_delta_lead <- NULL
data6$dailypaper_delta_lead2 <- NULL
for(i in 1:nrow(data6)){
  thiscity <- data6[which(data6$fips==data6$fips[i]),]
  futureyears <- thiscity[which(thiscity$YearData>data6$YearData[i]),]
  if(nrow(futureyears)>0){
    futureyears <- futureyears[order(futureyears$YearData,decreasing=F),]
    data6$dailypaper_delta_lead[i] <- futureyears$dailypaper_delta[1]
    if(length(unique(futureyears$YearData))>1){
      data6$dailypaper_delta_lead2[i] <- futureyears$dailypaper_delta[which(futureyears$YearData>futureyears$YearData[1])][1]
    }
    if(length(unique(futureyears$YearData))<2){
      data6$dailypaper_delta_lead2[i] <- NA
    }
  }
  if(nrow(futureyears)==0){
    data6$dailypaper_delta_lead[i] <- NA
    data6$dailypaper_delta_lead2[i] <- NA
  }
}


## election timing difference var:
data6$concurrent <- data$concurrent[match(data6$elec_index,data$elec_index)]
data6$concurrent_delta <- data6$concurrent_next - data6$concurrent


# delta anticipatory variables (in advance of a change in election timing):
data6$concurrent_delta_lead <- NULL
data6$concurrent_delta_lead2 <- NULL
for(i in 1:nrow(data6)){
  thiscity <- data6[which(data6$fips==data6$fips[i]),]
  futureyears <- thiscity[which(thiscity$YearData>data6$YearData[i]),]
  if(nrow(futureyears)>0){
    futureyears <- futureyears[order(futureyears$YearData,decreasing=F),]
    data6$concurrent_delta_lead[i] <- futureyears$concurrent_delta[1]
    if(length(unique(futureyears$YearData))>1){
      data6$concurrent_delta_lead2[i] <- futureyears$concurrent_delta[which(futureyears$YearData>futureyears$YearData[1])][1]
    }
    if(length(unique(futureyears$YearData))<2){
      data6$concurrent_delta_lead2[i] <- NA
    }
  }
  if(nrow(futureyears)==0){
    data6$concurrent_delta_lead[i] <- NA
    data6$concurrent_delta_lead2[i] <- NA
  }
}
table(data6$concurrent_delta_lead) # 282 about to go off-cycle, 196 about to go on-cycle
table(data6$concurrent_delta_lead2) # 266, 182

data6$concurrent_delta2 <- NULL
for(i in 1:nrow(data6)){
  thiscity <- data6[which(data6$fips==data6$fips[i]),]
  prevyears <- thiscity[which(thiscity$YearData<data6$YearData[i]),]
  if(nrow(prevyears)>0){
    prevyears <- prevyears[order(prevyears$YearData,decreasing=T),]
    data6$concurrent_delta2[i] <- prevyears$concurrent_delta[1]
  }
  if(nrow(prevyears)==0){
    data6$concurrent_delta2[i] <- NA
  }
}
sum(data6$concurrent_delta2==1,na.rm=T) 
sum(!is.na(data6$concurrent_delta2)) 

nrow(data) # 9131 elections
data6 <- data6[which(data6$cand_name != ""),] # 16120 obs
length(unique(data6$elec_index)) # 8974
nrow(data[!duplicated(data[,c("fips","YearData","mayor_votes_final")]),]) # 9128 unique election-years (multi elections in some years)
length(unique(data$fips)) # 1024 cities
length(unique(data6$fips)) # 1024 cities
nrow(unique(data6[which(data6$cand_name!=""),c("fips","cand_name")])) # 9919 unique candidates
nrow(unique(data6[which(data6$cand_name!=""),c("elec_index","cand_name")])) # 16086 individual obs

write.csv(data6, "data/cl-replication-data/dbk_2018.csv", row.names = FALSE)

#dbk_warshaw_2016:

load(rep_path("dbk_warshaw_2016/mayors_rdd_analysis_final.RData"))

write.csv(data2, "data/cl-replication-data/dbk_warshaw_2016.csv")

#eggers_hainmueller_2009:

egg_hain_df <- read_csv(rep_path("eggers_hainmueller_2009/MPs.csv"))

#eggers_hainmueller_2009a

egg_hain_cons_df <- egg_hain_df %>%
  filter(party == "tory")

write.csv(egg_hain_cons_df, "data/cl-replication-data/eggers_hainmueller_2009a.csv")

#eggers_hainmueller_2009b
egg_hain_lab_df <- egg_hain_df %>%
  filter(party == "labour")

write.csv(egg_hain_lab_df, "data/cl-replication-data/eggers_hainmueller_2009b.csv")


#eggers_spirling_2017:

D = read.csv(rep_path("eggers_spirling_2017/combined_data_to_2010_all_20150128.csv")) 
D$decade = floor(D$year/10)*10

early = D$year > 1900 & D$year < 1950 
late = D$year >= 1950  

con.early = early & D$rv.type %in% c("con-lab", "lib-con") 
lab.early = early & D$rv.type %in% c("con-lab", "lib-lab")
con.late = late & D$rv.type %in% c("con-lab", "lib-con") 
lab.late = late & D$rv.type %in% c("con-lab", "lib-lab")
con.all = (early | late) & D$rv.type %in% c("con-lab", "lib-con") 
lab.all = (early | late) & D$rv.type %in% c("con-lab", "lib-lab")

D$lib.opponent = NA
D$lib.opponent[D$rv.type %in% c("lib-con", "lib-lab")] = T
D$lib.opponent[D$rv.type %in% c("con-lab")] = F

late <- D$year >= 1950
D$use <- late & D$rv.type %in% c("con-lab", "lib-con")
D <- D %>%
  filter(use == TRUE)

#subset to variables we need: con.vote_share, con.rv, and con.winners

egg_spir_analysis_df <- D[,c("con.vote_share", "con.rv", "con.winners")]

write.csv(egg_spir_analysis_df, "data/cl-replication-data/eggers_spirling_2017.csv")

#erikson_etal_2015

erikson_etal_df <- read.csv("raw-data/intermediate-clean/erikson_etal_2015/erikson_etal_2015.csv")

erikson_etal_df <- erikson_etal_df %>%
  drop_na(rv, pres_change_reg_1)

write.csv(erikson_etal_df, "data/cl-replication-data/erikson_etal_2015.csv", row.names = FALSE)

#ferwerda_miller_2014

ferwerda_miller_df <- read.dta13(rep_path("ferwerda_miller_2014/FM_France.dta"))

#replace near_km_n = (near_km * -1) if south==1
#replace near_km_n = near_km  if south==0

ferwerda_miller_df <- ferwerda_miller_df %>%
  mutate(near_km_n = ifelse(south == 1, near_km*-1, near_km))

write.csv(ferwerda_miller_df, "data/cl-replication-data/ferwerda_miller_2014.csv")

#Fiva_smith_2018:

#data available only through an online repository

#folke_snyder_2012:

folke_snyder_df <- read.dta13(rep_path("erikson_etal_2015/tmp_data_to_use.dta"))

folke_snyder_df <- folke_snyder_df %>%
  filter(year >= 1882 & year <= 2010)

#Code from .do replication file for polynomial RDD fit: 
#reg  dem_lhpct_chng  dem_G  D_1 D_2 D_3      dem_congpct_chng  if election_G==0 & period_`i'==1 & D_1 != .;
#still seems to be a discrepancy in the number of observations reported

folke_snyder_df1 <- folke_snyder_df %>%
  mutate(y = dem_lhpct_chng - dem_congpct_chng) %>%
  filter(abs(y) >= 0) %>%
  filter(election_G == 0) %>%
  filter(period_1 == 1) %>%
  filter(abs(D_1) >= 0) 

#subset to variables needed for analysis: dem_lhpct_chng, D_1

folke_snyder_analysis_df <- folke_snyder_df1[,c("dem_lhpct_chng", "D_1")]

write.csv(folke_snyder_analysis_df, "data/cl-replication-data/folke_snyder_2012.csv", row.names = FALSE)

#fouirnaies_hall_2014:

fouir_hall_df <- read.dta13(rep_path("fouirnaies_hall_2014/fouirnaies_hall_financial_incumbency_advantage.dta"))

#fouirnaies_hall_2014a:

fouir_hall_a_df <- fouir_hall_df %>%
  filter(statelevel == 0)

#subset to variables needed for analysis: dv_money, rv

fouir_hall_a_analysis_df <- fouir_hall_a_df[,c("dv_money", "rv")]

write.csv(fouir_hall_a_analysis_df, "data/cl-replication-data/fouirnaies_hall_2014a.csv", row.names = FALSE)

#fouirnaies_hall_2014b:

fouir_hall_b_df <- fouir_hall_df %>%
  filter(statelevel == 1)

fouir_hall_b_analysis_df <- fouir_hall_b_df[,c("dv_money", "rv")]

write.csv(fouir_hall_b_analysis_df, "data/cl-replication-data/fouirnaies_hall_2014b.csv", row.names = FALSE)

#gerber_hopkins_2011:

gerber_hop_df <- read_csv(rep_path("gerber_hopkins_2011/cityreplication042811.csv"))

write.csv(gerber_hop_df, "data/cl-replication-data/gerber_hopkins_2011.csv", row.names = FALSE)

#gerber_etal_2011:

gerber_etal_df <- read.csv("raw-data/intermediate-clean/gerber_etal_2011/gerber_etal_2011.csv")

write.csv(gerber_etal_df, "data/cl-replication-data/gerber_etal_2011.csv")

#gulzar_pasquale_2017:

gulz_pasq_df <- read.dta13(rep_path("gulzar_pasquale_2017/gulzer_pasquale_2017/dta/replication_post_yearly_gp.dta"))

#create running variable:

gulz_pasq_df <- gulz_pasq_df %>%
  mutate(running = ifelse(split == 1, km, km*-1))

gulz_pasq_df <- gulz_pasq_df[, c("workdays", "worked", "running", "blockid")]

write.csv(gulz_pasq_df, "data/cl-replication-data/gulzar_pasquale_2017.csv", row.names = FALSE)

#hainmueller_etal_2017: data unavailable publicly

#hall_2015:

hall_df <- read.dta13(rep_path("hall_2015/primary_analysis.dta"))

cutoff <- median(hall_df$absdist)

hall_df <- hall_df %>%
  filter(absdist > cutoff)

#variables: dv	rv dv_win	

hall_df <- hall_df[,c("dv",	"rv", "dv_win")]

write.csv(hall_df, "data/cl-replication-data/hall_2015.csv")

#hall_thompson_2018:

hall_thomp_df <- read.dta13(rep_path("hall_thompson_2018/rd_analysis_hs.dta"))

cutoff <- median(hall_thomp_df$absdist)

hall_thomp_analysis_df <- hall_thomp_df %>%
  filter(absdist >= cutoff)

write.csv(hall_thomp_analysis_df, "data/cl-replication-data/hall_thompson_2018.csv")

#hidalgo_nichter_2016:

load(rep_path("hidalgo_nichter_2016/replication_data.RData"))

data <- data %>%
  mutate(outcome = electorate.perpop08 - electorate.perpop07)

write.csv(data, "data/cl-replication-data/hidalgo_nichter_2016.csv", row.names = FALSE)

#hirano_2011: data not available

#holbein_2016: data not publicly available

#holbein_hillygus_2016:

holbein_hillygus_df <- read.dta13(rep_path("holbein_hillygus_2016/Pre-Reg FL Data 8.6.13.dta"))

#variables: voted_2012	days_ratio_flipped

holbein_hillygus_df <- holbein_hillygus_df[,c("voted_2012",	"days_ratio_flipped")]

write.csv(holbein_hillygus_df, "data/cl-replication-data/holbein_hillygus_2016.csv", row.names = FALSE)

#klasnja_titiunik_2017:

klasnja_titiunik_df <- read.dta13(rep_path("klasnja_titiunik_2017/KlasnjaTitiunik-Brazil-data.dta"))

write.csv(klasnja_titiunik_df, "data/cl-replication-data/klasnja_titiunik_2017.csv")

#klasnja_2015:

klasnja_df <- read.dta13(rep_path("klasnja_2015/dataverse_files (28)/electoral_data.dta"))

write.csv(klasnja_df, "data/cl-replication-data/klasnja_2015.csv")

#larreguy_etal_2016:

larreguy_etal_df <- read.dta13(rep_path("larreguy_etal_2016/replication_files/Replication_Data.dta"))

#variables: id_estado, running, turnout, share_pri_ln, share_pan_ln, share_prd_ln

larreguy_etal_df <- larreguy_etal_df[,c("id_estado", "running", "turnout", 
                                        "share_pri_ln", "share_pan_ln", "share_prd_ln")]

write.csv(larreguy_etal_df, "data/cl-replication-data/larreguy_etal_2016.csv", row.names = FALSE)

#lopesdefonseca_2017:

lopesdefonseca_df <- read.csv("raw-data/intermediate-clean/lopesdefonseca_2017/lopesdefonseca_2017.csv")

write.csv(lopesdefonseca_df, "data/cl-replication-data/lopesdefonseca_2017.csv", row.names = FALSE)

#mo_conn_2018: data not publicly available

#novaes_2018:
load(rep_path("novaes_2018/complete_dataset.RData"))

tt <- rd
dos <- subset(tt, margin>0 & year == 2008 & ineligible == 0 & attrition == 0)
uno <- subset(tt, margin<0 & year == 2008 & ineligible == 0 & attrition == 0)
tres <- rbind(uno,dos)

write.csv(tres, "data/cl-replication-data/novaes_2018.csv")

#palmer_schneer_2016:

palm_schneer_sen_df <- read.dta13(rep_path("palmer_schneer_2016/senate_boards_final.dta"))

palm_schneer_sen_df <- palm_schneer_sen_df %>%
  filter(first_election == 1 & eligible0 == 1)

palm_schneer_sen_df$margin <- palm_schneer_sen_df$vote_g_pct_2p - .5

#vars: d_board0	margin	everwon

palm_schneer_sen_df <- palm_schneer_sen_df[,c("d_board0",	"margin",	"everwon")]

write.csv(palm_schneer_sen_df, "data/cl-replication-data/palmer_schneer_2016a.csv")

palm_schneer_gov_df <- read.dta13(rep_path("palmer_schneer_2016/governor_boards_final.dta"))

palm_schneer_gov_df <- palm_schneer_gov_df %>%
  filter(first_election == 1 & eligible0 == 1)

palm_schneer_gov_df$margin <- palm_schneer_gov_df$vote_g_pct_2p - .5

#vars: d_board0	margin	everwon

palm_schneer_gov_df <- palm_schneer_gov_df[,c("d_board0",	"margin",	"everwon")]

write.csv(palm_schneer_gov_df, "data/cl-replication-data/palmer_schneer_2016b.csv")

#rozenas_etal_2017

load(rep_path("rozenas_etal_2017/replication.RData"))

data <- rdd
data$el <- paste(data$year, data$type, sep = "-")
data$diff <- data$rayon_repression - data$neighbor_repression
data$d <- with(data, distance*(diff >= 0) - distance*(diff < 0))/1000

## Auxiliary functions used below:
strsplitm <- function(x, w, n) unlist(lapply(strsplit(x, w), function(x) x[n]))

# This function selects pairs historical rayons such that one is at least w standard deviations below the mean and another at least w standard deviations above the mean
s <- function(w) {
  ndata <- subset(data, ((scale(rayon_repression) < -w & scale(neighbor_repression) > w) | (scale(rayon_repression) > w & scale(neighbor_repression) < -w)) & abs(d) < 10)
  ndata$rayon <- factor(strsplitm(as.character(ndata$pair), "\\|", 1))
  ndata
}

ndata <- s(1)

#scale data that is used for analysis:

ndata$russian_margin <- scale(ndata$russian_margin)
ndata$rayon_repression <- scale(ndata$rayon_repression)

write.csv(ndata, "data/cl-replication-data/rozenas_etal_2017.csv")

#sances_2017:

sances_df <- read.dta13(rep_path("sances_2017/dataset.dta"))

sances_df$year <- as.numeric(as.character(sances_df$year))

sances_df <- sances_df %>%
  filter(year < 2012)

sances_df <- arrange(sances_df, town, desc(year))

write.csv(sances_df, "data/cl-replication-data/sances_2017.csv")

#schickler_etal_2009:

schickler_etal_df <- read.dta13(rep_path("schickler_2009/replication materials for figures 5a to 5d/pooled dataset with district info 140328_STATA12.dta"))

#schickler_etal_2009a:

schickler_a_df <- schickler_etal_df %>%
  filter(south == 0 & exnotfull==0 & sdem==0 & party3 > 0 & cong==73 & petind==14)

#subset to variables we need for analysis: petpooled, Dvotespercent, icpsr

schickler_a_df <- schickler_a_df[, c("petpooled", "Dvotespercent", "icpsr")]

write.csv(schickler_a_df, "data/cl-replication-data/schickler_etal_2009a.csv", row.names = FALSE)

#schickler_etal_2009b:

schickler_b_df <- schickler_etal_df %>%
  filter(south==0 & exnotfull==0 & sdem==0 & party3>0 & cong==78) %>%
  filter(petind==3 | petind==5 | petind==18)

#subset to variables we need for analysis: petpooled, Dvotespercent, icpsr

schickler_b_df <- schickler_b_df[, c("petpooled", "Dvotespercent", "icpsr")]

write.csv(schickler_b_df, "data/cl-replication-data/schickler_etal_2009b.csv", row.names = FALSE)

#schickler_etal_2009c:

schickler_c_df <- schickler_etal_df %>%
  filter(south==0 & exnotfull==0 & sdem==0 & party3>0 & cong==83) %>%
  filter(petind==4 | petind==5)

#subset to variables we need for analysis: petpooled, Dvotespercent, icpsr

schickler_c_df <- schickler_c_df[, c("petpooled", "Dvotespercent", "icpsr")]

write.csv(schickler_c_df, "data/cl-replication-data/schickler_etal_2009c.csv", row.names = FALSE)

#schickler_etal_2009d:

schickler_d_df <- schickler_etal_df %>%
  filter(south==0 & exnotfull==0 & sdem==0 & party3>0 & cong==88) %>%
  filter(petind==5)

#subset to variables we need for analysis: petpooled, Dvotespercent, icpsr

schickler_d_df <- schickler_d_df[, c("petpooled", "Dvotespercent", "icpsr")]

write.csv(schickler_d_df, "data/cl-replication-data/schickler_etal_2009d.csv", row.names = FALSE)

#szakonyi_2018:

load(rep_path("szakonyi_2018/Candidates.RData"))
load(rep_path("szakonyi_2018/ConnectedFirms.RData"))

cons<-subset(cons, party_win==0)

write.csv(cons, "data/cl-replication-data/szakonyi_2018.csv", row.names = FALSE)

#xu_yao_2015: 

xu_yao_df <- read.dta13(rep_path("xu_yao_2015/Xu_Yao_2015_Replication/XuYao2015_informal_jan15.dta"))

write.csv(xu_yao_df, "data/cl-replication-data/xu_yao_2015.csv")
